package pl.pelcra.cesar.spider.webparser.sites;

import org.jsoup.nodes.Element;

import pl.pelcra.cesar.spider.webcrawler.CSWebURL;
import pl.pelcra.cesar.spider.webparser.interfaces.BasePageParser;
import pl.pelcra.cesar.tools.Tools;
import edu.uci.ics.crawler4j.url.UrlResolver;

public class EuroparlPageParser extends BasePageParser {

	public EuroparlPageParser() {

	}

	public EuroparlPageParser(String parameters, int politenessDelay) {
		super(parameters, politenessDelay);
	}

	@Override
	public void init(CSWebURL webURL, String language, String doc_id) {
		super.init(webURL, "div[class^=ep_box ep_product]");

		if (super.isGetPageSuccess()) {
			super.setContentTag("div[class=ep_box_body]");

			String categoryName = super.getMapParams().get("Category");
			if (!categoryName.equals("Agenda")
					&& !categoryName.equals("Agenda2004-2009")
					&& !categoryName.equals("Newsletter")
					&& !categoryName.equals("Briefing")
					&& !categoryName.equals("Post-briefing")
					&& !categoryName.equals("Briefing2004-2009")
					&& !categoryName.equals("Post-briefing2004-2009")) {
				this.setDoc_id("div[class=ep_reference]");
			} else {
				this.setURLDocId(doc_id);
			}

			super.setCategory(categoryName);
			this.setLanguage(language);
			super.setSource_id(super.getMapParams().get("SourceId"));

			super.setPage_dateTag("span[class=ep_date]");
			super.setParsed_pagedateFormat("dd-MM-yyyy - HH:mm");
			super.setTitleTag("h1[class=ep_title]");
			super.setAuthorTag("");

			super.setNotes("");
			this.setUrlAttachment("a[target=_blank]");
		}
	}

	@Override
	public void setDoc_id(String doc_idTag) {
		Element element = getElement(doc_idTag);
		if (element != null) {
			String docId = new String(element.ownText());
			docId = docId.trim();
			docId = Tools.replaceNonBreakingSpace(docId);
			super.setDoc_id(docId);
		}
	}

	@Override
	public void setLanguage(String language) {
		// http://www.europarl.europa.eu/sides/getDoc.do?pubRef=-//EP//TEXT+IM-PRESS+20120410STO42632+0+DOC+XML+V0//EN&language=EN
		String pageUrl = this.getPage_url();
		pageUrl = pageUrl.toLowerCase().replace("&language=en", "");
		super.setLanguage(pageUrl.substring(pageUrl.length() - 2));
	}

	@Override
	public void setUrlAttachment(String tagValue) {
		Element attachmentElement = super.getElement(tagValue);
		if (attachmentElement != null) {
			super.setUrlAttachment(UrlResolver.resolveUrl(
					attachmentElement.baseUri(), attachmentElement.attr("href")));
			this.addNotes("Full text in pdf.");
		}
	}

	public void setURLDocId(String doc_id) {

		// http://www.europarl.europa.eu/sides/getDoc.do?pubRef=-//EP//TEXT+IM-PRESS+20090218BRI49890+ITEM-COVER-EN+DOC+XML+V0//EN&language=EN
		doc_id = doc_id.replace(
				"http://www.europarl.europa.eu/sides/getDoc.do?", "");
		doc_id = doc_id.toLowerCase().replace("&language=en", "");
		doc_id = doc_id.toLowerCase().replace("language=en&", "");

		super.setDoc_id(doc_id);
	}
}
